clear all
close all
clc

inputFile = 'baskervilles.txt';
outputFile = 'output.txt';
allowedChar = [' ' '''', 'a':'z'];
numChar = length(allowedChar);
strLength = 1000;
wordsPerLine = 15;

%% 1st Order
counts = letterStatistics(inputFile, allowedChar, 1);

%Plot histogram of 1st order letter statistics
figure
bar(1:numChar, counts)
set(gca, 'XTick', 1:numChar, ...
         'XTickLabel', num2cell(allowedChar))
set(gca, 'XLim', [0,numChar])

%Simulation
simulatedString = simulateIndep(allowedChar, counts, strLength);

%Output string
writeString(outputFile, simulatedString, wordsPerLine);

%% 2nd Order
counts = letterStatistics(inputFile, allowedChar, 2);

%Construct Markov matrix
MM = reshape(counts, numChar, numChar)';
for ii = 1:length(MM)
    rowSum = sum(MM(ii,:));
    
    %Check if the entire row is empty
    if rowSum
        MM(ii,:) = MM(ii,:)/rowSum;
    end
end

%Display the Markov matrix
figure
imagesc(MM)
colormap('gray')
set(gca, 'XAxisLocation', 'top')
set(gca, 'XTick', 1:numChar, ...
         'XTickLabel', num2cell(allowedChar))
set(gca, 'YTick', 1:numChar, ...
         'YTickLabel', num2cell(allowedChar))
     
%Simulation
simulatedString = simulateMarkov(allowedChar, counts, 2, strLength);

%Output string
writeString(outputFile, simulatedString, wordsPerLine);

%% 3rd Order
counts = letterStatistics(inputFile, allowedChar, 3);

%Simulation
simulatedString = simulateMarkov(allowedChar, counts, 3, strLength);

%Output string
writeString(outputFile, simulatedString, wordsPerLine);

%% 4th Order
counts = letterStatistics(inputFile, allowedChar, 4);

%Simulation
simulatedString = simulateMarkov(allowedChar, counts, 4, strLength);

%Output string
writeString(outputFile, simulatedString, wordsPerLine);
     
